
# --------- R Code for Figure on the Left ---------

set.seed(123)  # For reproducibility

# Load libraries
library(ggplot2)
library(dplyr)
library(cubature)

# 2D mixture of Gaussians
p <- function(D, x1, x2) {
  mu <- D / (2 * sqrt(2))
  0.5 * dnorm(x1, mean = mu, sd = 1) * dnorm(x2, mean = mu, sd = 1) +
    0.5 * dnorm(x1, mean = -mu, sd = 1) * dnorm(x2, mean = -mu, sd = 1)
}

# Negative log-density
f <- function(D, x1, x2) {
  -log(p(D, x1, x2))
}

# Inverse temperature schedule
beta <- function(D) {
  beta1 <- 1 / D^2
  vec <- c(beta1)
  
  repeat {
    a <- 1
    b <- -10 / 4 * beta1
    c <- 5 / 4 * beta1^2
    root <- (-b + sqrt(b^2 - 4 * a * c)) / (2 * a)
    
    if (root >= 1) break
    vec <- c(vec, root)
    beta1 <- root
  }
  
  c(vec, 1)
}

# Normalization constant
Z <- function(D, beta_val) {
  integrand <- function(x) {
    p(D, x[1], x[2])^beta_val
  }
  
  center <- D / (2 * sqrt(2))
  support_radius <- 4
  lower <- c(-center - support_radius, -center - support_radius)
  upper <- c(center + support_radius, center + support_radius)
  
  result <- cubintegrate(
    f = integrand,
    lower = lower,
    upper = upper,
    relTol = 1e-8,
    absTol = 1e-12
  )
  
  result$integral
}

# Simulation parameters 

lambda     <- 0.5
iterations <- 20000
N          <- 15000
eta        <- 400
num_D <- seq(5, 40, by = 2.5)  # Range of D values for simulation

# Initialize matrices to store means and cumulative means
mean_x1          <- matrix(nrow = length(num_D), ncol = N)
mean_x2          <- matrix(nrow = length(num_D), ncol = N)
cumulative_avg_x1 <- matrix(nrow = length(num_D), ncol = N)
cumulative_avg_x2 <- matrix(nrow = length(num_D), ncol = N)

# Run simulation for each value of D
for (k in 1:length(num_D)) {
  D <- num_D[k]
  
  # Initialize matrices to store samples: x = (x1, x2)
  samples_matrix_x1 <- matrix(nrow = iterations, ncol = N)
  samples_matrix_x2 <- matrix(nrow = iterations, ncol = N)
  
  # Compute beta schedule and normalization constants
  beta_D <- beta(D)
  L      <- length(beta_D)
  Z_D    <- sapply(beta_D, function(b) Z(D, b))
  
  # Run STMH Chain
  for (iter in 1:iterations) {
    x1 <- 10
    x2 <- 10
    i  <- 1
    n  <- 1
    
    while (n <= N) {
      b <- rbinom(1, 1, lambda)
      
      if (b == 1) {
        # Propose temperature swap
        inew <- i + sample(c(-1, 1), size = 1)
        
        if (inew >= 1 && inew <= L) {
          fx <- f(D, x1, x2)
          
          swap_ratio <- if (is.finite(fx)) {
            (Z_D[i] * exp(-beta_D[inew] * fx)) / (Z_D[inew] * exp(-beta_D[i] * fx))
          } else {
            0
          }
          
          if (runif(1) <= min(1, swap_ratio)) {
            i <- inew
          }
        }
        
      } else {
        # Local Metropolis-Hastings move
        y1 <- rnorm(1, mean = x1, sd = sqrt(eta))
        y2 <- rnorm(1, mean = x2, sd = sqrt(eta))
        
        fx <- f(D, x1, x2)
        fy <- f(D, y1, y2)
        
        if (runif(1) <= min(1, exp(-beta_D[i] * (fy - fx)))) {
          x1 <- y1
          x2 <- y2
        }
      }
      
      # Store samples
      samples_matrix_x1[iter, n] <- x1
      samples_matrix_x2[iter, n] <- x2
      
      n <- n + 1
    }
  }
  
  # Compute means and cumulative means
  mean_x1[k, ] <- colMeans(samples_matrix_x1, na.rm = TRUE)
  mean_x2[k, ] <- colMeans(samples_matrix_x2, na.rm = TRUE)
  
  cumulative_avg_x1[k, ] <- cumsum(mean_x1[k, ]) / (1:N)
  cumulative_avg_x2[k, ] <- cumsum(mean_x2[k, ]) / (1:N)
  
}


# Initialize vectors to store number of steps until cumulative average drops below 0.1
result_x1 <- rep(0, length(num_D))
result_x2 <- rep(0, length(num_D))

# Compute the number of steps until cumulative average drops below 0.1
for (i in 1:length(num_D)) {
  result_x1[i] <- which(cumulative_avg_x1[i, ] < 0.1)[1]
  result_x2[i] <- which(cumulative_avg_x2[i, ] < 0.1)[1]
}



# --------- R Code for Figure on the Right ---------

set.seed(123)  # For reproducibility

# Load libraries
library(ggplot2)
library(dplyr)
library(cubature)

# 2D mixture of Gaussians
p <- function(D, x1, x2) {
  mu <- D / (2 * sqrt(2))
  0.5 * dnorm(x1, mean = mu, sd = 1) * dnorm(x2, mean = mu, sd = 1) +
    0.5 * dnorm(x1, mean = -mu, sd = 1) * dnorm(x2, mean = -mu, sd = 1)
}

# Negative log-density
f <- function(D, x1, x2) {
  -log(p(D, x1, x2))
}

# Inverse temperature schedule
beta <- function(D) {
  beta1 <- 1 / D^2
  vec <- c(beta1)
  
  repeat {
    a <- 1
    b <- -10 / 4 * beta1
    c <- 5 / 4 * beta1^2
    root <- (-b + sqrt(b^2 - 4 * a * c)) / (2 * a)
    
    if (root >= 1) break
    vec <- c(vec, root)
    beta1 <- root
  }
  
  c(vec, 1)
}

# Normalization constant
Z <- function(D, beta_val) {
  integrand <- function(x) {
    p(D, x[1], x[2])^beta_val
  }
  
  center <- D / (2 * sqrt(2))
  support_radius <- 4
  lower <- c(-center - support_radius, -center - support_radius)
  upper <- c(center + support_radius, center + support_radius)
  
  result <- cubintegrate(
    f = integrand,
    lower = lower,
    upper = upper,
    relTol = 1e-8,
    absTol = 1e-12
  )
  
  result$integral
}

# Simulation parameters
lambda     <- 0.5
iterations <- 20000
N          <- 5000
eta        <- 400
D          <- 30

# Compute beta schedule and normalization constants
beta_D <- beta(D)
L      <- length(beta_D)
Z_D    <- sapply(beta_D, function(b) Z(D, b))

# Initialize matrices to store samples: x = (x1, x2) 
samples_matrix_x1 <- matrix(nrow = iterations, ncol = N)
samples_matrix_x2 <- matrix(nrow = iterations, ncol = N)

# Run STMH chain
for (iter in 1:iterations) {
  x1 <- 10
  x2 <- 10
  i  <- 1
  n  <- 1
  
  while (n <= N) {
    b <- rbinom(1, 1, lambda)
    
    if (b == 1) {
      # Propose temperature swap
      inew <- i + sample(c(-1, 1), size = 1)
      
      if (inew >= 1 && inew <= L) {
        fx <- f(D, x1, x2)
        
        if (is.finite(fx)) {
          swap_ratio <- (Z_D[i] * exp(-beta_D[inew] * fx)) / (Z_D[inew] * exp(-beta_D[i] * fx))
        } else {
          swap_ratio <- 0
        }
        
        if (runif(1) <= min(1, swap_ratio)) {
          i <- inew
        }
      }
      
    } else {
      # Local Metropolis-Hastings move
      y1 <- rnorm(1, mean = x1, sd = sqrt(eta))
      y2 <- rnorm(1, mean = x2, sd = sqrt(eta))
      
      fx <- f(D, x1, x2)
      fy <- f(D, y1, y2)
      
      if (runif(1) <= min(1, exp(-beta_D[i] * (fy - fx)))) {
        x1 <- y1
        x2 <- y2
      }
    }
    
    # Store current sample
    samples_matrix_x1[iter, n] <- x1
    samples_matrix_x2[iter, n] <- x2
    
    n <- n + 1
  }
}

# ----- Convergence Analysis -----
mean_x1 <- colMeans(samples_matrix_x1)
mean_x2 <- colMeans(samples_matrix_x2)

cumulative_avg_x1 <- cumsum(mean_x1) / (1:N)
cumulative_avg_x2 <- cumsum(mean_x2) / (1:N)




